{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pdfplumber"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Unnamed: 0</th>\n",
       "      <th>average</th>\n",
       "      <th>country</th>\n",
       "      <th>genre</th>\n",
       "      <th>language</th>\n",
       "      <th>release_date</th>\n",
       "      <th>title</th>\n",
       "      <th>votes</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>9.7</td>\n",
       "      <td>美国</td>\n",
       "      <td>['剧情', '犯罪']</td>\n",
       "      <td>英语</td>\n",
       "      <td>1994-09-10</td>\n",
       "      <td>肖申克的救赎 The Shawshank Redemption</td>\n",
       "      <td>2083437.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>9.2</td>\n",
       "      <td>美国</td>\n",
       "      <td>['喜剧', '动画', '冒险']</td>\n",
       "      <td>英语 / 挪威语</td>\n",
       "      <td>2016-03-04</td>\n",
       "      <td>疯狂动物城 Zootopia</td>\n",
       "      <td>1321807.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>9.5</td>\n",
       "      <td>美国</td>\n",
       "      <td>['剧情', '爱情']</td>\n",
       "      <td>英语</td>\n",
       "      <td>1994-06-23</td>\n",
       "      <td>阿甘正传 Forrest Gump</td>\n",
       "      <td>1573820.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>9.0</td>\n",
       "      <td>中国大陆</td>\n",
       "      <td>['剧情', '喜剧']</td>\n",
       "      <td>汉语普通话 / 英语 / 上海话 / 印地语</td>\n",
       "      <td>2018-07-05</td>\n",
       "      <td>我不是药神</td>\n",
       "      <td>1529042.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>9.4</td>\n",
       "      <td>日本</td>\n",
       "      <td>['剧情', '动画', '奇幻']</td>\n",
       "      <td>日语</td>\n",
       "      <td>2019-06-21</td>\n",
       "      <td>千与千寻 千と千尋の神隠し</td>\n",
       "      <td>1636148.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>8.9</td>\n",
       "      <td>美国</td>\n",
       "      <td>['剧情', '喜剧', '传记']</td>\n",
       "      <td>英语 / 意大利语 / 俄语 / 德语</td>\n",
       "      <td>2019-03-01</td>\n",
       "      <td>绿皮书 Green Book</td>\n",
       "      <td>1109388.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>9.4</td>\n",
       "      <td>美国</td>\n",
       "      <td>['剧情', '爱情', '灾难']</td>\n",
       "      <td>英语 / 意大利语 / 德语 / 俄语</td>\n",
       "      <td>1998-04-03</td>\n",
       "      <td>泰坦尼克号 Titanic</td>\n",
       "      <td>1527182.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Unnamed: 0  average country               genre                 language  \\\n",
       "0           0      9.7      美国        ['剧情', '犯罪']                       英语   \n",
       "1           1      9.2      美国  ['喜剧', '动画', '冒险']                 英语 / 挪威语   \n",
       "2           2      9.5      美国        ['剧情', '爱情']                       英语   \n",
       "3           3      9.0    中国大陆        ['剧情', '喜剧']   汉语普通话 / 英语 / 上海话 / 印地语   \n",
       "4           4      9.4      日本  ['剧情', '动画', '奇幻']                       日语   \n",
       "5           5      8.9      美国  ['剧情', '喜剧', '传记']      英语 / 意大利语 / 俄语 / 德语   \n",
       "6           6      9.4      美国  ['剧情', '爱情', '灾难']      英语 / 意大利语 / 德语 / 俄语   \n",
       "\n",
       "  release_date                            title      votes  \n",
       "0   1994-09-10  肖申克的救赎 The Shawshank Redemption  2083437.0  \n",
       "1   2016-03-04                   疯狂动物城 Zootopia  1321807.0  \n",
       "2   1994-06-23                阿甘正传 Forrest Gump  1573820.0  \n",
       "3   2018-07-05                            我不是药神  1529042.0  \n",
       "4   2019-06-21                    千与千寻 千と千尋の神隠し  1636148.0  \n",
       "5   2019-03-01                   绿皮书 Green Book  1109388.0  \n",
       "6   1998-04-03                    泰坦尼克号 Titanic  1527182.0  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(\"data/movie.csv\",nrows=7)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3.8.10 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "e5b51f9075b4cc1ea8d9810577a26807122690438b3a6e6e05129a402faed2ba"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
